*********************GENERATION OF SCORES************************************************
******************************************************************************************
//*SET USER*//
*Users Identification Code
*User 1
*User 2


//Set user below: 
global user 1
set more off
//Enter the filepath of where the Master Folder is located//
*********************************************************
//User 1
if $user == 1 {
	global FH ""
}
**********************************************************
//User 2
if $user == 2 {

	global FH 
}
************************************************************

//*SET DIRECTORY & LOAD DATA*//
use "$FH/data/FH_PHL.dta", clear
******************************************************************************************
//*MEASURES FOR INDIVIDUAL PRACTICES*//
//Note on Nomenclature: Codes for measures of respective business practices are designated before definition of business practice.

*********************************************************************************
////CASH SEPARATION (CS)////

///CS_1: If Business and Household cash are separated///
//Baseline
tab b_cashseparate
gen b_cs1=3 if b_cashseparate==1 
replace b_cs1=1 if b_cashseparate==0
tab b_cs1
//Endline
tab e_cashseparate
gen e_cs1=3 if e_cashseparate==1 
replace e_cs1=1 if e_cashseparate==0
tab e_cs1

///CS_2: Frequency of cash withdrawal from Business///
//Baseline
tab b_cashwithdraw
gen b_cs2=1 if b_cashwithdraw==1|b_cashwithdraw==2|b_cashwithdraw==5|b_cashwithdraw==6|b_cashwithdraw==7|b_cashwithdraw<0
replace b_cs2=2 if b_cashwithdraw==4
replace b_cs2=3 if b_cashwithdraw==3
tab b_cs2, nol
//Endline
tab e_cashwithdraw
gen e_cs2=1 if e_cashwithdraw==1|e_cashwithdraw==2|e_cashwithdraw==5|e_cashwithdraw==6|e_cashwithdraw==7|e_cashwithdraw<0
replace e_cs2=2 if e_cashwithdraw==4
replace e_cs2=3 if e_cashwithdraw==3
tab e_cs2, nol

///CS_3: If Profits are Calculated///
//Baseline
tab b_profitcal
tab b_profitcal,nol
gen b_cs3=3 if b_profitcal==2
replace b_cs3=1 if b_profitcal==1 | b_profitcal < 0
tab b_cs3
//Endline
tab e_profitcal
tab e_profitcal,nol
gen e_cs3=3 if e_profitcal==2
replace e_cs3=1 if e_profitcal==1 | e_profitcal < 0
tab e_cs3

*********************************************************************************
////CUSTOMER CREDIT (CC)////

///CC_1:Number of days for which credit is given///
/*Note: added if b_qc12!=7 or e_qc12!=7 - to include only those who give credit*/

//Baseline
tab b_qc12
sum b_daygivecredit
gen b_cc1=3 if b_daygivecredit<=7 & b_qc12!=7
replace b_cc1=1 if b_daygivecredit>7 & b_daygivecredit!=. & b_qc12!=7
tab b_cc1
//Endline
tab e_qc12
sum e_daygivecredit
gen e_cc1=3 if (e_daygivecredit<=7&e_daygivecredit>=0) & e_daygivecredit!=.& e_qc12!=7
replace e_cc1=1 if e_daygivecredit>7 & e_daygivecredit!=.& e_qc12!=7 | e_daygivecredit<0 & e_qc12!=7
tab e_cc1

///CC_2: What do you do when clients do not pay their dues?///
/*Note: For this multiple answers question, we take the average score of the answers rather than the highest score of the answers.
For example, some respondents answered both "donothing" (1 point) and "remindbyvisit" (3 points), which means he sometimes does nothing but sometimes takes an action.
From our perspective, those who always take action (answered "remindbyvisit" (3 points) only) should get a higher score. In this sense, taking the average score seems better*/

//Baseline
tab b_remindwhenvisit
tab b_remindbycall
tab b_remindbyvisit
tab b_waittocome
tab b_donothing
gen b_cc2_count = 0
gen b_cc2_score = 0
replace b_cc2_count = b_cc2_count + 1 if b_remindwhenvisit == 1
replace b_cc2_score = b_cc2_score + 2 if b_remindwhenvisit == 1
replace b_cc2_count = b_cc2_count + 1 if b_remindbycall == 1
replace b_cc2_score = b_cc2_score + 3 if b_remindbycall == 1
replace b_cc2_count = b_cc2_count + 1 if b_remindbyvisit == 1
replace b_cc2_score = b_cc2_score + 3 if b_remindbyvisit == 1
replace b_cc2_count = b_cc2_count + 1 if b_waittocome == 1
replace b_cc2_score = b_cc2_score + 1 if b_waittocome == 1
replace b_cc2_count = b_cc2_count + 1 if b_donothing == 1
replace b_cc2_score = b_cc2_score + 1 if b_donothing == 1
gen b_cc2 = b_cc2_score/b_cc2_count if b_cc2_count!=0
tab b_cc2
drop b_cc2_count b_cc2_score
//Endline
tab e_remindwhenvisit
tab e_remindbycall
tab e_remindbyvisit
tab e_waittocome
tab e_donothing
gen e_cc2_count = 0
gen e_cc2_score = 0
replace e_cc2_count = e_cc2_count + 1 if e_remindwhenvisit == 1
replace e_cc2_score = e_cc2_score + 2 if e_remindwhenvisit == 1
replace e_cc2_count = e_cc2_count + 1 if e_remindbycall == 1
replace e_cc2_score = e_cc2_score + 3 if e_remindbycall == 1
replace e_cc2_count = e_cc2_count + 1 if e_remindbyvisit == 1
replace e_cc2_score = e_cc2_score + 3 if e_remindbyvisit == 1
replace e_cc2_count = e_cc2_count + 1 if e_waittocome == 1
replace e_cc2_score = e_cc2_score + 1 if e_waittocome == 1
replace e_cc2_count = e_cc2_count + 1 if e_donothing == 1
replace e_cc2_score = e_cc2_score + 1 if e_donothing == 1
gen e_cc2 = e_cc2_score/e_cc2_count if e_cc2_count!=0
tab e_cc2
drop e_cc2_count e_cc2_score

///CC_3: Do you keep records of your business?///
/*Note: Do not need qc12!=7 since this question includes records other than customer credit*/
//Baseline
tab b_keeprecord
gen b_cc3=3 if b_keeprecord==1
replace b_cc3=1 if b_keeprecord==0 | b_keeprecord < 0
tab b_cc3
//Endline
tab e_keeprecord
gen e_cc3=3 if e_keeprecord==1
replace e_cc3=1 if e_keeprecord==0 | e_keeprecord < 0
tab e_cc3

///CC_4: Do you keep records of customer credit?///
/*Note: Added if b_selloncredit!=7 or e_selloncredit!=7 - to include only those who give credit*/
//Baseline
tab b_qc12
tab b_keepcredrecord
gen b_cc4=3 if b_keepcredrecord==1 & b_qc12 != 7
replace b_cc4=1 if b_keepcredrecord==0 & b_qc12 != 7
tab b_cc4
//Endline
tab e_qc12
tab e_keepcredrecord
gen e_cc4=3 if e_keepcredrecord==1 & e_qc12 != 7
replace e_cc4=1 if e_keepcredrecord==0 & e_qc12 != 7
tab e_cc4

///CC_5:What records of customer credit?///
/*Note: Added if b_qc12!=7 or e_qc12!=7 - to include only those who give credit*/

//Baseline
tab b_qc12
tab b_recordname if b_qc12!=7
tab b_recorddate if b_qc12!=7
tab b_recordamount if b_qc12!=7
gen b_cc5=3 if (b_recordname==1 & b_recorddate==1 & b_recordamount==1) & b_qc12!=7
replace b_cc5=1 if b_cc5!=3 & b_qc12!=7
tab b_cc5
//Endline
tab e_qc12
tab e_recordname if e_qc12!=7
tab e_recorddate if e_qc12!=7
tab e_recordamount if e_qc12!=7
gen e_cc5=3 if (e_recordname==1 & e_recorddate==1 & e_recordamount==1) & e_qc12!=7
replace e_cc5=1 if e_cc5!=3 & e_qc12!=7
tab e_cc5

*********************************************************************************
////INVENTORY MANAGEMENT (IM)////

///IM_1: In past 2 weeks did you run out of stock of fastest selling items?///
//Baseline
tab b_outofstock
gen b_im1=3 if b_outofstock==1
replace b_im1=2 if b_outofstock==2
replace b_im1=1 if b_outofstock==3|b_outofstock==4|b_outofstock==5|b_outofstock==6|b_outofstock<0
tab b_im1
//Endline
tab e_outofstock
gen e_im1=3 if e_outofstock==1
replace e_im1=2 if e_outofstock==2
replace e_im1=1 if e_outofstock==3|e_outofstock==4|e_outofstock==5|e_outofstock==6|e_outofstock<0
tab e_im1

///IM_2: Method to determine how much to stock///
//Baseline
tab b_stockmethod
gen b_im2=3 if b_stockmethod==2|b_stockmethod==5
replace b_im2=2 if b_stockmethod==1|b_stockmethod==4
replace b_im2=1 if b_stockmethod==3|b_stockmethod==6|b_stockmethod<0
tab b_im2
//Endline
tab e_stockmethod
gen e_im2=3 if e_stockmethod==2|e_stockmethod==5
replace e_im2=2 if e_stockmethod==1|e_stockmethod==4
replace e_im2=1 if e_stockmethod==3|e_stockmethod==6|e_stockmethod<0
tab e_im2

///IM_3: Do you visit competitors to check out goods/prices?///
//Baseline
tab b_visitcompet
gen b_im3=3 if b_visitcompet==5
replace b_im3=2 if b_visitcompet==3|b_visitcompet==4
replace b_im3=1 if b_visitcompet==1|b_visitcompet==2|b_visitcompet==6|b_visitcompet==7|b_visitcompet<0
tab b_im3
//Endline
tab e_visitcompet
gen e_im3=3 if e_visitcompet==5
replace e_im3=2 if e_visitcompet==3|e_visitcompet==4
replace e_im3=1 if e_visitcompet==1|e_visitcompet==2|e_visitcompet==6|e_visitcompet==7|e_visitcompet<0
tab e_im3

///IM_4: Do you talk to customers to understand what they buy from competitors?///
//Baseline
tab b_customertalk
gen b_im4=3 if b_customertalk==1|b_customertalk==2
replace b_im4=2 if b_customertalk==3|b_customertalk==4
replace b_im4=1 if b_customertalk==5|b_customertalk==6|b_customertalk<0
tab b_im4
//Endline
tab e_customertalk
gen e_im4=3 if e_customertalk==1|e_customertalk==2
replace e_im4=2 if e_customertalk==3|e_customertalk==4
replace e_im4=1 if e_customertalk==5|e_customertalk==6
tab e_im4

///IM_5: Did you introduce new product in past 2 months?///
//Baseline
tab b_newproduct
gen b_im5=3 if b_newproduct==1
replace b_im5=1 if b_newproduct==0|b_newproduct<0
tab b_im5
//Endline
tab e_newproduct
gen e_im5=3 if e_newproduct==1
replace e_im5=1 if e_newproduct==0|e_newproduct<0
tab e_im5

///IM_6: On what basis did you decide to introduce a new product?///
//Baseline
des b_qc22_1*
tab b_qc22_1_a
tab b_qc22_1_b
tab b_qc22_1_c
tab b_qc22_1_d
gen b_im6 = 3 if b_qc22_1_c==1|b_qc22_1_d==1 & b_im5 == 3
replace b_im6 = 2 if b_im5 == 3 & b_im6 != 3 
/*Note: There are also some answers under b_qc22_1_othr*/
tab b_im6
//Endline
des e_qc22_1*
tab e_qc22_1_a
tab e_qc22_1_b
tab e_qc22_1_c
tab e_qc22_1_d
gen e_im6 = 3 if e_qc22_1_c==1|e_qc22_1_d==1 & e_im5 == 3 
replace e_im6 = 2 if e_im5 == 3 & e_im6 != 3 
/*There are also some answers under e_qc22_1_othr*/
tab e_im6

*********************************************************************************
////SUPPLIER MANAGEMENT (SM)////

///SM_1: Do you compare prices and quality of various suppliers?///
//Baseline
tab b_comparesuppliers
gen b_sm1=3 if b_comparesuppliers==4
replace b_sm1=2 if b_comparesuppliers==3|b_comparesuppliers==5
replace b_sm1=1 if b_comparesuppliers==1|b_comparesuppliers==2|b_comparesuppliers==7|b_comparesuppliers<0
tab b_sm1
//Endline
tab e_comparesuppliers
gen e_sm1=3 if e_comparesuppliers==4
replace e_sm1=2 if e_comparesuppliers==3|e_comparesuppliers==5
replace e_sm1=1 if e_comparesuppliers==1|e_comparesuppliers==2|e_comparesuppliers==6|e_comparesuppliers==7|e_comparesuppliers<0
tab e_sm1

///SM_2: Do you negoitate prices with suppliers?///
//Baseline
tab b_negotiate
gen b_sm2=3 if b_negotiate==1|b_negotiate==2|b_negotiate==3|b_negotiate==4|b_negotiate==5|b_negotiate==6
replace b_sm2=1 if b_negotiate==7 | b_negotiate<0
tab b_sm2
//Endline
tab e_negotiate
gen e_sm2=3 if e_negotiate==1|e_negotiate==2|e_negotiate==3|e_negotiate==4|e_negotiate==5|e_negotiate==6
replace e_sm2=1 if e_negotiate==7 | e_negotiate<0
tab e_sm2

///SM_3: Did you ask for better terms?///
//Baseline
tab b_askbetterterm
gen b_sm3=3 if b_askbetterterm==1
replace b_sm3=2 if b_askbetterterm==2
replace b_sm3=1 if b_askbetterterm==3|b_askbetterterm==4|b_askbetterterm<0
tab b_sm3
//Endline
tab e_askbetterterm
gen e_sm3=3 if e_askbetterterm==1
replace e_sm3=2 if e_askbetterterm==2
replace e_sm3=1 if e_askbetterterm==3|e_askbetterterm==4|e_askbetterterm<0
tab e_sm3

///SM_4: Were purchases made on cash v/s credit in last month?///
/*Note: added if b_qc26 == 1 or e_qc26 == 1 - to include only those who get a discount for paying in cash*/
//Baseline
tab b_qc26,nol
tab b_qc27,nol
gen b_sm4 = 3 if b_qc27 == 1 & b_qc26 == 1
replace b_sm4 = 2 if (b_qc27 == 2 | b_qc27 == 3) & b_qc26 == 1
replace b_sm4 = 1 if b_sm4 != 2 & b_sm4 != 3 & b_qc26 == 1
tab b_sm4
//Endline
tab e_qc26,nol
tab e_qc27,nol
gen e_sm4 = 3 if e_qc27 == 1 & e_qc26 == 1
replace e_sm4 = 2 if (e_qc27 == 2 | e_qc27 == 3) & e_qc26 == 1
replace e_sm4 = 1 if e_sm4 != 2 & e_sm4 != 3 & e_qc26 == 1
tab e_sm4

******************************************************************************************
//*SUMMARIZING SCORES & CORRECTIONS*//
//Tabulation of Baseline Scores
foreach v of varlist b_cs1 b_cs2 b_cs3 b_cc1 b_cc2 b_cc3 b_cc4 b_cc5 b_im1 b_im2 b_im3 b_im4 b_im5 b_im6 b_sm1 b_sm2 b_sm3 b_sm4{
	tab `v' 
}

//Tabulation of Endline Scores
foreach v of varlist e_cs1 e_cs2 e_cs3 e_cc1 e_cc2 e_cc3 e_cc4 e_cc5 e_im1 e_im2 e_im3 e_im4 e_im5 e_im6 e_sm1 e_sm2 e_sm3 e_sm4{
	tab `v' 
}

count if b_im6 !=. & b_im5 == 1 
count if e_im6 !=. & e_im5 == 1

/*Note:
Find missing values in correlation matrix for corr(im5, im6). The reason is that for non-missing im6, im5 does not vary (take the value 3 only).
We suggest three ways to take care of this, in order of preference:
1) Take the average scores of im5 and im6 to form im5_adj
To be specific, those who introduced new product based on a good strategy get 3 points, those who introduced new product based on an average strategy get 2.5 points, and those who do not even introduce a new product get 1 point.
2) Or alternatively, code im6 into 2 (medium) for those who don't even introduce a new product.
A similar case is cc4 and cc5, where those who do not keep record of customer credit get 1 point for both cc4 and cc5. The difference here is that keeping record of customer credit is more like a "black and white" question, so those who don't record customer credit get a severe punishment (get 1 point for both cc4 and cc5).
3) Simply set corr(im5, im6) to 0.
The obvious difference between the first two methods is that the latter lay more weight on newproduct related questions (2 variables, keep both im5 and im6). 
We use the first method because we think the second method lay too much weight on them, since introducing new a product is less like a "black and white" question, 
*/
egen b_im5_adj = rmean(b_im5 b_im6)
egen e_im5_adj = rmean(e_im5 e_im6)
tab b_im5_adj
tab e_im5_adj

******************************************************************************************
//*FRACTION OF BUSINESSES IN DIFFERENT CATEGORIES*//
//Note: This step classifies all businesses in three categories based on their scores//
*********************************************************************************
///To calculate fraction of time a business scores in the "HIGH" category///
//Optimal-Score based//
//Baseline
gen b_optimal_count = 0
gen b_optimal_HighCount = 0
foreach v of varlist b_cs1 b_cs3 b_cc2 b_cc3 b_cc4 b_cc5 b_im2 b_im3 b_im4 b_im5_adj b_sm1 b_sm2 b_sm3 b_sm4{
	replace b_optimal_count = b_optimal_count + 1 if `v' !=.
	replace b_optimal_HighCount = b_optimal_HighCount + 1 if `v' == 3
}
tab b_optimal_count
gen b_optimal_HighFreq = b_optimal_HighCount/b_optimal_count
//Endline
gen e_optimal_count = 0
gen e_optimal_HighCount = 0
foreach v of varlist e_cs1 e_cs3 e_cc2 e_cc3 e_cc4 e_cc5 e_im2 e_im3 e_im4 e_im5_adj e_sm1 e_sm2 e_sm3 e_sm4{
	replace e_optimal_count = e_optimal_count + 1 if `v' !=.
	replace e_optimal_HighCount = e_optimal_HighCount + 1 if `v' == 3
}
tab e_optimal_count
gen e_optimal_HighFreq = e_optimal_HighCount/e_optimal_count

///Training-Score based///
//Baseline
gen b_training_count = 0
gen b_training_HighCount = 0
foreach v of varlist b_cs1 b_cs2 b_cc1 b_cc2 b_cc4 b_cc5 b_im1 b_im3 b_im4 b_im5_adj b_sm2 b_sm3 b_sm4{
	replace b_training_count = b_training_count + 1 if `v' !=.
	replace b_training_HighCount = b_training_HighCount + 1 if `v' == 3
}
tab b_training_count
gen b_training_HighFreq = b_training_HighCount/b_training_count
//Endline
gen e_training_count = 0
gen e_training_HighCount = 0
foreach v of varlist e_cs1 e_cs2 e_cc1 e_cc2 e_cc4 e_cc5 e_im1 e_im3 e_im4 e_im5_adj e_sm2 e_sm3 e_sm4{
	replace e_training_count = e_training_count + 1 if `v' !=.
	replace e_training_HighCount = e_training_HighCount + 1 if `v' == 3
}
tab e_training_count
gen e_training_HighFreq = e_training_HighCount/e_training_count

*********************************************************************************
///To calculate fraction of time a business scores in the "HIGH/MEDIUM" category (>=2)///
///Optimal-Score based///
//Baseline
gen b_optimal_AboveMedCount = 0
foreach v of varlist b_cs1 b_cs3 b_cc2 b_cc3 b_cc4 b_cc5 b_im2 b_im3 b_im4 b_im5_adj b_sm1 b_sm2 b_sm3 b_sm4{
	replace b_optimal_AboveMedCount = b_optimal_AboveMedCount + 1 if `v' >= 2 & `v' !=.
}
gen b_optimal_AboveMedFreq = b_optimal_AboveMedCount/b_optimal_count
//Endline
gen e_optimal_AboveMedCount = 0
foreach v of varlist e_cs1 e_cs3 e_cc2 e_cc3 e_cc4 e_cc5 e_im2 e_im3 e_im4 e_im5_adj e_sm1 e_sm2 e_sm3 e_sm4{
	replace e_optimal_AboveMedCount = e_optimal_AboveMedCount + 1 if `v' >= 2 & `v' !=.
}
gen e_optimal_AboveMedFreq = e_optimal_AboveMedCount/e_optimal_count

//Training-Score based//
//Baseline
gen b_training_AboveMedCount = 0
foreach v of varlist b_cs1 b_cs2 b_cc1 b_cc2 b_cc4 b_cc5 b_im1 b_im3 b_im4 b_im5_adj b_sm2 b_sm3 b_sm4{
	replace b_training_AboveMedCount = b_training_AboveMedCount + 1 if `v' >= 2 & `v' !=.
}
gen b_training_AboveMedFreq = b_training_AboveMedCount/b_training_count
//Endline
gen e_training_AboveMedCount = 0
foreach v of varlist e_cs1 e_cs2 e_cc1 e_cc2 e_cc4 e_cc5 e_im1 e_im3 e_im4 e_im5_adj e_sm2 e_sm3 e_sm4{
	replace e_training_AboveMedCount = e_training_AboveMedCount + 1 if `v' >= 2 & `v' !=.
}
gen e_training_AboveMedFreq = e_training_AboveMedCount/e_training_count

******************************************************************************************
//*CORRECTIONS*//
/*Note:There are 198 respondents who have only 2 non-missing answers*/
/*Note: The 2 non-missing answers are for e_cc4 and e_cc5. Instead, e_cc4 and e_cc5 should also be missing values*/
tab e_training_count if e_optimal_count == 2
tab e_cc4 e_cc5 if e_optimal_count == 2
/*Note: Confirm that these 198 respondents do not give respond to this multiple answers question "What do you record in your business book?"*/
preserve
tab e_qc14_1_othr if e_optimal_count == 2
tab e_keepcredrecord if e_optimal_count == 2
drop e_qc14_1_othr
egen count = rowtotal(e_qc14_1_*)
tab count if e_optimal_count == 2
restore

/*Note:Correct this mistake -- recode e_cc4 and e_cc5 as missing values*/
replace e_cc4 =. if e_optimal_count == 2
replace e_cc5 =. if e_optimal_count == 2
replace e_optimal_HighFreq = . if e_optimal_count == 2
replace e_training_HighFreq = . if e_training_count == 2
replace e_optimal_AboveMedFreq = . if e_optimal_count == 2
replace e_training_AboveMedFreq = . if e_training_count == 2


*********************************************************************************
//*SUMMARY STATS OF FREQUENCIES & CORRELATIONS*//
sum b_optimal_HighFreq, de
sum e_optimal_HighFreq, de
sum b_training_HighFreq, de
sum e_training_HighFreq, de
sum b_optimal_AboveMedFreq, de
sum e_optimal_AboveMedFreq, de
sum b_training_AboveMedFreq, de
sum e_training_AboveMedFreq, de
corr b_training_AboveMedFreq b_training_HighFreq
corr e_training_AboveMedFreq e_training_HighFreq
corr b_optimal_AboveMedFreq b_optimal_HighFreq
corr e_optimal_AboveMedFreq e_optimal_HighFreq

******************************************************************************************
//*CORRELATION MATRICES*//
///USING PAIRWISE CORRELATION///
*********************************************************************************
/*Optimal Index*/
//cs1 cs3_op cc2 cc3_op cc4 cc5 im2_op im3 im4 im5 im6 sm1_op sm2 sm3 sm4
pwcorr b_cs1 b_cs3 b_cc2 b_cc3 b_cc4 b_cc5 b_im2 b_im3 b_im4 b_im5_adj b_sm1 b_sm2 b_sm3 b_sm4
pwcorr e_cs1 e_cs3 e_cc2 e_cc3 e_cc4 e_cc5 e_im2 e_im3 e_im4 e_im5_adj e_sm1 e_sm2 e_sm3 e_sm4

*********************************************************************************
/*Training Index*/
//cs1 cs2_tr cc1_tr cc2 cc4 cc5 im1_tr im3 im4 im5 im6 sm2 sm3 sm4

pwcorr b_cs1 b_cs2 b_cc1 b_cc2 b_cc4 b_cc5 b_im1 b_im3 b_im4 b_im5_adj b_sm2 b_sm3 b_sm4
pwcorr e_cs1 e_cs2 e_cc1 e_cc2 e_cc4 e_cc5 e_im1 e_im3 e_im4 e_im5_adj e_sm2 e_sm3 e_sm4

******************************************************************************************
//*GENERATION OF PRACTICE SCORES*//

/*Generating practice score based on simple averages -- for a first look at regressions with covariates*/

//Baseline Optimal Score
egen b_optimal_score = rmean(b_cs1 b_cs3 b_cc2 b_cc3 b_cc4 b_cc5 b_im2 b_im3 b_im4 b_im5_adj b_sm1 b_sm2 b_sm3 b_sm4)
//Endline Optimal Score
egen e_optimal_score = rmean(e_cs1 e_cs3 e_cc2 e_cc3 e_cc4 e_cc5 e_im2 e_im3 e_im4 e_im5_adj e_sm1 e_sm2 e_sm3 e_sm4)

//Baseline & Endline Training Scores
egen b_training_score = rmean(b_cs1 b_cs2 b_cc1 b_cc2 b_cc4 b_cc5 b_im1 b_im3 b_im4 b_im5_adj b_sm2 b_sm3 b_sm4)
egen e_training_score = rmean(e_cs1 e_cs2 e_cc1 e_cc2 e_cc4 e_cc5 e_im1 e_im3 e_im4 e_im5_adj e_sm2 e_sm3 e_sm4)



sum e_optimal_score, de
sum b_optimal_score, de
sum e_training_score, de
sum b_training_score, de


foreach var of varlist b_cs1 b_cs2 b_cc1 b_cc2 b_cc4 b_cc5 b_im1 b_im3 b_im4 b_im5_adj b_sm2 b_sm3 b_sm4 {
	table `var', stat(percent)
}

foreach var of varlist e_cs1 e_cs2 e_cc1 e_cc2 e_cc4 e_cc5 e_im1 e_im3 e_im4 e_im5_adj e_sm2 e_sm3 e_sm4 {
	table `var', stat(percent)
}

//////GENERATING MODIFIED SCORES
/////BASELINE
//Variables scored on 1,3 
foreach var of varlist b_cs1 b_cc1 b_cc4 b_cc5 b_sm2 {
	
	codebook `var'
}

foreach var of varlist b_cs1 b_cc1 b_cc4 b_cc5 b_sm2 {

replace `var' = 2 if `var' == 1
replace `var' = 1 if `var' == 3
replace `var' = 0 if `var' == 2	


}

foreach var of varlist b_cs1 b_cc1 b_cc4 b_cc5 b_sm2 {
	
	codebook `var'
}

//Variables scored on 1, 2, 3

foreach var of varlist b_cs2 b_im1 b_im3 b_im4 b_sm3 b_sm4 {
	
	tab `var'
	codebook `var'
}

foreach var of varlist b_cs2 b_im1 b_im3 b_im4 b_sm3 b_sm4 {

tab `var', matcell(x)
matrix list x
mata: st_matrix("xx", st_matrix("x")/colsum(rowsum(st_matrix("x"))))
matrix list xx 

display xx[1,1]

gen `var'_a = xx[1,1] + xx[2,1]
replace `var'_a = abs(0.5-`var'_a)

gen `var'_b = xx[2,1] + xx[3,1]
replace `var'_b = abs(0.5-`var'_b)

gen Test = 0
replace Test = 1 if `var'_a < `var'_b

gen `var'_New = .

if Test == 1 {
	replace `var'_New = 0 if `var' == 1 | `var' == 2
	replace `var'_New = 1 if `var' == 3
	replace `var'_New = . if `var' == .
}

else if Test == 0 {
	replace `var'_New = 0 if `var' == 1
	replace `var'_New = 1 if `var' == 2 | `var' == 3
	replace `var'_New = . if `var' == .
	
}

drop `var'
rename `var'_New `var'
drop Test

}

foreach var of varlist b_cs2 b_im1 b_im3 b_im4 b_sm3 b_sm4 {
	
	tab `var'
	codebook `var'
}


//Variables scored 1, 2.5 & 3
tab b_im5_adj

tab b_im5_adj, matcell(x)
matrix list x
mata: st_matrix("xx", st_matrix("x")/colsum(rowsum(st_matrix("x"))))
matrix list xx 

display xx[1,1]

gen b_im5_adj_a = xx[1,1] + xx[2,1]
replace b_im5_adj_a = abs(0.5-b_im5_adj_a)

gen b_im5_adj_b = xx[2,1] + xx[3,1]
replace b_im5_adj_b = abs(0.5-b_im5_adj_b)

gen Test = 0
replace Test = 1 if b_im5_adj_a < b_im5_adj_b

gen b_im5_adj_New = .

if Test == 1 {
	replace b_im5_adj_New = 0 if b_im5_adj == 1 | b_im5_adj == 2.5
	replace b_im5_adj_New = 1 if b_im5_adj == 3
	replace b_im5_adj_New = . if b_im5_adj == .
}

else if Test == 0 {
	replace b_im5_adj_New = 0 if b_im5_adj == 1
	replace b_im5_adj_New = 1 if b_im5_adj == 2.5 | b_im5_adj == 3
	replace b_im5_adj_New = . if b_im5_adj == .
	
}

drop b_im5_adj
rename b_im5_adj_New b_im5_adj
drop Test


//Variables Scored on other scales 

tab b_cc2
tab b_cc2, matcell(x)

matrix list x
mata: st_matrix("xx", st_matrix("x")/colsum(rowsum(st_matrix("x"))))
matrix list xx 

display xx[1,1]

gen b_cc2_a = xx[1,1] + xx[2,1] + xx[3,1] + xx[4,1] + xx[5,1] + xx[6,1]
replace b_cc2_a = abs(0.5-b_cc2_a)

gen b_cc2_b = xx[6,1] + xx[7,1] + xx[8,1] + xx[9,1] + xx[10,1] + xx[11,1]
replace b_cc2_b = abs(0.5-b_cc2_b)

gen Test = 0
replace Test = 1 if b_cc2_a < b_cc2_b

gen b_cc2_New = .

if Test == 1 {
	replace b_cc2_New = 0 if inlist(b_cc2, 1, 2) | inrange(b_cc2, 1, 2)
	replace b_cc2_New = 1 if b_cc2 > 2
	replace b_cc2_New = . if b_cc2 == .
}

else if Test == 0 {
	replace b_cc2_New = 0 if b_cc2 < 2
	replace b_cc2_New = 1 if inlist(b_cc2, 2, 3) | inrange(b_cc2, 2, 3)
	replace b_cc2_New = . if b_cc2 == .
	
}

drop b_cc2
rename b_cc2_New b_cc2
drop Test
codebook b_cc2


/////ENDLINE
//Variables scored on 1,3 
foreach var of varlist e_cs1 e_cc1 e_cc4 e_cc5 e_sm2 {
	
	tab `var'
}

foreach var of varlist e_cs1 e_cc1 e_cc4 e_cc5 e_sm2 {

replace `var' = 2 if `var' == 1
replace `var' = 1 if `var' == 3
replace `var' = 0 if `var' == 2	


}


//Variables scored on 1, 2, 3

foreach var of varlist e_cs2 e_im1 e_im3 e_im4 e_sm3 e_sm4 {
	
	tab `var'
	codebook `var'
}

foreach var of varlist e_cs2 e_im1 e_im3 e_im4 e_sm3 e_sm4 {

tab `var', matcell(x)
matrix list x
mata: st_matrix("xx", st_matrix("x")/colsum(rowsum(st_matrix("x"))))
matrix list xx 

display xx[1,1]

gen `var'_a = xx[1,1] + xx[2,1]
replace `var'_a = abs(0.5-`var'_a)

gen `var'_b = xx[2,1] + xx[3,1]
replace `var'_b = abs(0.5-`var'_b)

gen Test = 0
replace Test = 1 if `var'_a < `var'_b

gen `var'_New = .

if Test == 1 {
	replace `var'_New = 0 if `var' == 1 | `var' == 2
	replace `var'_New = 1 if `var' == 3
	replace `var'_New = . if `var' == .
}

else if Test == 0 {
	replace `var'_New = 0 if `var' == 1
	replace `var'_New = 1 if `var' == 2 | `var' == 3
	replace `var'_New = . if `var' == .
}

drop `var'
rename `var'_New `var'
drop Test

}

//Variables scored 1, 2.5 & 3
tab e_im5_adj

tab e_im5_adj, matcell(x)
matrix list x
mata: st_matrix("xx", st_matrix("x")/colsum(rowsum(st_matrix("x"))))
matrix list xx 

display xx[1,1]

gen e_im5_adj_a = xx[1,1] + xx[2,1]
replace e_im5_adj_a = abs(0.5-e_im5_adj_a)

gen e_im5_adj_b = xx[2,1] + xx[3,1]
replace e_im5_adj_b = abs(0.5-e_im5_adj_b)

gen Test = 0
replace Test = 1 if e_im5_adj_a < e_im5_adj_b

gen e_im5_adj_New = .

if Test == 1 {
	replace e_im5_adj_New = 0 if e_im5_adj == 1 | e_im5_adj == 2.5
	replace e_im5_adj_New = 1 if e_im5_adj == 3
	replace e_im5_adj_New = . if e_im5_adj_New == .
}

else if Test == 0 {
	replace e_im5_adj_New = 0 if e_im5_adj == 1
	replace e_im5_adj_New = 1 if e_im5_adj == 2.5 | e_im5_adj == 3
	replace e_im5_adj_New = . if e_im5_adj == .
	
}

drop e_im5_adj
rename e_im5_adj_New e_im5_adj
drop Test


//Variables Scored on other scales 

tab e_cc2
tab e_cc2, matcell(x)

matrix list x
mata: st_matrix("xx", st_matrix("x")/colsum(rowsum(st_matrix("x"))))
matrix list xx 

display xx[1,1]

gen e_cc2_a = xx[1,1] + xx[2,1] + xx[3,1] + xx[4,1] + xx[5,1] + xx[5,1]
replace e_cc2_a = abs(0.5-e_cc2_a)

gen e_cc2_b = xx[5,1] + xx[7,1] + xx[8,1] + xx[9,1] + xx[10,1] 
replace e_cc2_b = abs(0.5-e_cc2_b)

gen Test = 0
replace Test = 1 if e_cc2_a < e_cc2_b

gen e_cc2_New = .

if Test == 1 {
	replace e_cc2_New = 0 if inlist(e_cc2, 1, 2) | inrange(e_cc2, 1, 2)
	replace e_cc2_New = 1 if e_cc2 > 2
	replace e_cc2_New = . if e_cc2 == .
}

else if Test == 0 {
	replace e_cc2_New = 0 if e_cc2 < 2
	replace e_cc2_New = 1 if inlist(e_cc2, 2, 3) | inrange(e_cc2, 2, 3)
	replace e_cc2_New = . if e_cc2 == .
	
}

drop e_cc2
rename e_cc2_New e_cc2
drop Test

foreach var of varlist b_cs1 b_cs2 b_cc1 b_cc2 b_cc4 b_cc5 b_im1 b_im3 b_im4 b_im5_adj b_sm2 b_sm3 b_sm4 {
	
	codebook `var'
	
}

foreach var of varlist e_cs1 e_cs2 e_cc1 e_cc2 e_cc4 e_cc5 e_im1 e_im3 e_im4 e_im5_adj e_sm2 e_sm3 e_sm4 {
	
	codebook `var'
}

//Baseline Training Score
egen b_training_score2 = rmean(b_cs1 b_cs2 b_cc1 b_cc2 b_cc4 b_cc5 b_im1 b_im3 b_im4 b_im5_adj b_sm2 b_sm3 b_sm4)
//Endline Training Score
egen e_training_score2 = rmean(e_cs1 e_cs2 e_cc1 e_cc2 e_cc4 e_cc5 e_im1 e_im3 e_im4 e_im5_adj e_sm2 e_sm3 e_sm4)

gen scb_profitregwk = w1b_profitregwk/w1b_salesregwk
gen sce_profitregwk = w1e_profitregwk/w1e_salesregwk


gen log_b_salesregwk = log(1+b_salesregwk)
gen log_e_salesregwk = log(1+e_salesregwk)

gen log_b_profitregwk = log(1+b_profitregwk)
gen log_e_profitregwk = log(1+e_profitregwk)


gen asinh_b_salesregwk = asinh(b_salesregwk)
gen asinh_e_salesregwk = asinh(e_salesregwk)

gen asinh_b_profitregwk = asinh(b_profitregwk)
gen asinh_e_profitregwk = asinh(e_profitregwk)

winsor b_salesregwk, p(0.02) gen(w2b_salesregwk)
winsor e_salesregwk, p(0.02) gen(w2e_salesregwk)
winsor b_profitregwk, p(0.02) gen(w2b_profitregwk)
winsor e_profitregwk, p(0.02) gen(w2e_profitregwk)

winsor b_salesregwk, p(0.1) gen(w10b_salesregwk)
winsor e_salesregwk, p(0.1) gen(w10e_salesregwk)
winsor b_profitregwk, p(0.1) gen(w10b_profitregwk)
winsor e_profitregwk, p(0.1) gen(w10e_profitregwk)

//Baseline Training Score without cash separation component
egen b_training_score3 = rmean(b_cc1 b_cc2 b_cc4 b_cc5 b_im1 b_im3 b_im4 b_im5_adj b_sm2 b_sm3 b_sm4)
//Endline Training Score without cash separation component
egen e_training_score3 = rmean(e_cc1 e_cc2 e_cc4 e_cc5 e_im1 e_im3 e_im4 e_im5_adj e_sm2 e_sm3 e_sm4)


**********************************************************************************************
//Based on the suggestions given by the WBER referees, it would make sense to try out different variations of the outcome variables. We will be sticking to Optimal Index, Regular week sales and regular week profits as outcome variables

/*winsor b_salesregwk, p(0.01) gen(test)
gen ok =. 
replace ok = 1 if w1b_salesregwk == test
tab ok
drop ok
drop test*/

/*winsor b_profitregwk, p(0.01) gen(test)
gen ok =. 
replace ok = 1 if w1b_profitregwk == test
tab ok
drop ok
drop test*/

/*winsor b_salesregwk, p(0.05) gen(test)
gen ok =. 
replace ok = 1 if w5b_salesregwk == test
tab ok
drop ok
drop test*/

/*winsor b_profitregwk, p(0.05) gen(test)
gen ok =. 
replace ok = 1 if w5b_profitregwk == test
tab ok
drop ok
drop test*/


/*winsor e_salesregwk, p(0.01) gen(test)
gen ok =. 
replace ok = 1 if w1e_salesregwk == test
tab ok
drop ok
drop test*/

/*winsor e_profitregwk, p(0.01) gen(test)
gen ok =. 
replace ok = 1 if w1e_profitregwk == test
tab ok
drop ok
drop test*/

/*winsor e_salesregwk, p(0.05) gen(test)
gen ok =. 
replace ok = 1 if w5e_salesregwk == test
tab ok
drop ok
drop test*/

/*winsor e_profitregwk, p(0.05) gen(test)
gen ok =. 
replace ok = 1 if w5e_profitregwk == test
tab ok
drop ok
drop test*/
******************************************************************************************
/*Generating Baseline Winsorized values (We already have 1% & 5% winsorized values)
summarize b_salesregwk, de
/*histogram e_salesregwk, frequency normal scheme(s1gcolor)*/
winsor b_salesregwk, p(0.1) gen(w10b_salesregwk)
winsor b_salesregwk, p(0.15) gen(w15b_salesregwk)
gen log_b_salesregwk = log(1+b_salesregwk)
gen asinh_b_salesregwk = asinh(b_salesregwk)

summarize b_profitregwk, de
/*histogram e_profitregwk, frequency normal scheme(s1gcolor)*/
winsor b_profitregwk, p(0.1) gen(w10b_profitregwk)
winsor b_profitregwk, p(0.15) gen(w15b_profitregwk)
gen log_b_profitregwk = log(1+b_profitregwk)
gen asinh_b_profitregwk = asinh(b_profitregwk)

//Tabulating Values to see change
summarize b_salesregwk, de 
summarize w1b_salesregwk, de
summarize w5b_salesregwk, de
summarize w10b_salesregwk, de
summarize w15b_salesregwk, de

summarize b_profitregwk, de
summarize w1b_profitregwk, de
summarize w5b_profitregwk, de
summarize w10b_profitregwk, de
summarize w15b_profitregwk, de

***********************************************************************************************
 
//Generating Endline Winsorized values (We already have 1% & 5% winsorized values)
summarize e_salesregwk, de
/*histogram e_salesregwk, frequency normal scheme(s1gcolor)*/
winsor e_salesregwk, p(0.1) gen(w10e_salesregwk)
winsor e_salesregwk, p(0.15) gen(w15e_salesregwk)
gen log_e_salesregwk = log(1+e_salesregwk)
gen asinh_e_salesregwk = asinh(e_salesregwk)

summarize e_profitregwk, de
/*histogram e_profitregwk, frequency normal scheme(s1gcolor)*/
winsor e_profitregwk, p(0.1) gen(w10e_profitregwk)
winsor e_profitregwk, p(0.15) gen(w15e_profitregwk)
gen log_e_profitregwk = log(1+e_profitregwk)
gen asinh_e_profitregwk = asinh(e_profitregwk)

//Tabulating Values to see change
summarize e_salesregwk, de 
summarize w1e_salesregwk, de
summarize w5e_salesregwk, de
summarize w10e_salesregwk, de
summarize w15e_salesregwk, de

summarize e_profitregwk, de
summarize w1e_profitregwk, de
summarize w5e_profitregwk, de
summarize w10e_profitregwk, de
summarize w15e_profitregwk, de
*/
